Data preparation

First, we create a dataset containing element network information joined with other attributes of elements. We specifically only consider elements that are known to form at least five minerals on Earth since 4.33 Ga. The following elements are therefore excluded (in parentheses is given the number of minerals it forms): Dy (1), Er (1), Gd (1), Hf (1), Sm (1), Re (2), Rb (3).

# Prepare if file not present. Otherwise, read in the file.
if (file.exists(data_file)) {
  element_networks_info <- read_csv(data_file)
} else {
  mineral_threshold <- 5

  dragon:::element_info %>%
    # Remove those not present **since** 4.33 Ga
    filter(element != "Tc") -> elements
  
  purrr::map_df(elements$element, parse_element_network) %>%
    left_join(elements) %>%
    mutate(n_elements = as.numeric(n_elements),
           n_minerals = as.numeric(n_minerals),
           n_localities = as.numeric(n_localities)) %>%
    filter(n_minerals >= mineral_threshold) -> element_networks_info
  
  # Save to file
  write_csv(element_networks_info, data_file)
}

Here is the full dataset used for analysis:

datatable(element_networks_info)



Analysis 1: What is the relationship between number of elements interacted with and number of minerals formed? This analysis asks if number of elements can explain number of minerals.


5 First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.

# regression

# No
#lm(n_minerals ~ n_elements, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)

# Yes
lm(log(n_minerals) ~ n_elements, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)

# No
#lm(n_minerals ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)

# No
#lm(log(n_minerals) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)



The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.

get_model_info(model_ylog, "n_elements") -> model_info


make_plot(
  element_networks_info %>% mutate(n_minerals_log = log(n_minerals)),
  n_elements,
  n_minerals_log,
  model_info,
  "Number of Elements",
  "Log Number of Minerals",
  c("C", "H", "O", "N", "P", "S"),
  4 #seed
) -> model1_plot


model1_plot

Analysis 2: What is the relationship between number of elements interacted with and number of localities it is found at? This analysis asks if number of elements can explain number of localities.


First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.

# regression

# No
#lm(n_localities ~ n_elements, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)

# Yes
lm(log(n_localities) ~ n_elements, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)

# No
#lm(n_localities ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)

# No
#lm(log(n_localities) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)



The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.

get_model_info(model_ylog, "n_elements") -> model_info

make_plot(
  element_networks_info %>% mutate(n_localities_log = log(n_localities)),
  n_elements,
  n_localities_log,
  model_info,
  "Number of Elements",
  "Log Number of Localities",
  c("C", "H", "O", "N", "Au", "P"),
  10
) -> model2_plot

# S and P have to be labeled separately
model2_plot + 
  geom_text(
    aes(label = ifelse(element == "S", "S", "")),
    nudge_y = 0.2,
    nudge_x = -1,
    fontface = "bold",
    color = label.color,
    size = label.size
  ) 

model2_plot

Analysis 3: What is the relationship between number of elements interacted with and percentage of crust? This analysis asks if element crust percentage by weight can explain the number of elements.

Note that there are six elements which do not appear in this analysis because they are missing crust data - C, H, N, REE (rare earth elements), Rh, Te.


First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log x and untransformed y, whose diagnostics best meet assumptions.


# regression

# No
#lm(n_elements ~ element_crust_percent_weight, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)

# No
#lm(log(n_elements) ~ element_crust_percent_weight, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)

# Yes
lm(n_elements ~ log(element_crust_percent_weight), data = element_networks_info) -> model_xlog
performance::check_model(model_xlog)

# No
#lm(log(n_elements) ~ log(element_crust_percent_weight), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)



The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.

# Since we have a logged x, have to remake model:
element_networks_info %>%
  mutate(element_crust_percent_weight_log = log(element_crust_percent_weight)) -> forfit
lm(n_elements ~ element_crust_percent_weight_log, data = forfit) -> model_xlog
get_model_info(model_xlog, "element_crust_percent_weight_log") -> model_info


make_plot(
  element_networks_info %>% 
    mutate(element_crust_percent_weight_log = log(element_crust_percent_weight)),
  element_crust_percent_weight_log,
  n_elements,
  model_info,
  "Log crust percentage",
  "Number of Elements",
  c("As", "Pb", "Cu", "S", "O", "Fe", "Mn", "Zn", "P", "Mo", "Co", "Br", "Sc", "Yb", "Ga"),
  11
) -> model3_plot

# P and Ni separately, except the bottom P just allows the first P....
model3_plot + 
  # This is an insane hack for P and I DO NOT REGRET IT!!
  geom_text(
    aes(label = ifelse(element == "P", "P", "")),
    nudge_y = 2,
    nudge_x = 0,
    fontface = "bold",
    color = "white",
    size = label.size
  ) +
  geom_text(
    aes(label = ifelse(element == "Ni", "Ni", "")),
    nudge_y = -2.5,
    nudge_x = -0.4,
    fontface = "bold",
    color = label.color,
    size = label.size
  )

model3_plot

Analysis 4: What is the relationship between number of elements interacted with and electronegativity? This analysis asks if the number of elements can explain the electronegativity.


First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with both untransformed y and x, whose diagnostics best meet assumptions.


# Yes
lm(pauling ~ n_elements, data = element_networks_info) -> model_plain
performance::check_model(model_plain)

# No
#lm(log(pauling) ~ n_elements, data = element_networks_info) -> model_ylog
#performance::check_model(model_ylog)

# No
#lm(pauling ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)

# No
#lm(log(pauling) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)



The resulting model is as follows - THERE IS NO RELATIONSHIP.:

get_model_info(model_plain, "n_elements") -> model_info


make_plot(
  element_networks_info,
  n_elements,
  pauling,
  model_info,
  "Number of Elements",
  "Pauling electronegativity"
) -> model4_plot



model4_plot

Analysis 5: What is the relationship between number of minerals formed and electronegativity? This analysis asks if the number of minerals can explain electronegativity.


First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.


# No
#lm(n_minerals ~ pauling, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)

# Yes
lm(log(n_minerals) ~ pauling, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)

# No
#lm(n_minerals ~ log(pauling), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)

# No but close
#lm(log(pauling) ~ log(n_minerals), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)



The resulting model is as follows - THERE IS NO RELATIONSHIP.

get_model_info(model_ylog, "pauling") -> model_info


make_plot(
  element_networks_info %>% mutate(n_minerals_log = log(n_minerals)),
  pauling,
  n_minerals_log,
  model_info,
  "Pauling electronegativity",
  "Log number of minerals formed"
) -> model5_plot



model5_plot

Analysis 6: What is the relationship between atomic number (number of protons) and the number of elements interacted with? This analysis asks if the atomic number (number of protons) can explain the number of elements interacted with.


First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with both axes untransformed; all diagnostics are about the same, so we’ll use the regular data.


# Yes
lm(n_elements ~ number_of_protons, data = element_networks_info) -> model_plain
performance::check_model(model_plain)

# No
#lm(log(n_elements) ~ number_of_protons, data = element_networks_info) -> model_ylog
#performance::check_model(model_ylog)

# No
#lm(n_elements ~ log(number_of_protons), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)

# No
#lm(log(n_elements) ~ log(number_of_protons), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)



The resulting model is as follows - THERE IS A NEGATIVE RELATIONSHIP.

get_model_info(model_plain, "number_of_protons") -> model_info

make_plot(
  element_networks_info,
  n_elements,
  number_of_protons,
  model_info,
  "Number of Elements",
  "Atomic number", # variable is number_of_protons which is equivalent
   c("As", "Pb", "Cu", "S", "O", "Fe", "Mn", "P", "Zn", "Ni", "Co", "Br", "Sc", "Yb", "Ga", "C", "H", "N", "Bi", "U")
) -> model6_plot

# Mo is specifically annoying to label because it's ON the regression line
model6_plot +
  geom_text(
    aes(label = ifelse(element == "Mo", "Mo", "")),
    nudge_y = 3,
    fontface = "bold",
    color = label.color,
    size = label.size) -> model6_plot
model6_plot



Figure export

Here we export figures to PDF files for use in manuscript.

Manuscript Figure 1

Figure 1 is two panels comprised of model1_plot and model2_plot.

fig1_grid <- plot_grid(model1_plot, model2_plot, labels = "AUTO", nrow = 2)
save_plot(ms_fig1_file, fig1_grid, base_height = 10, base_width = 7)

Figure 2 is two panels comprised of model3_plot and model6_plot.

fig2_grid <- plot_grid(model3_plot, model6_plot, labels = "AUTO", nrow = 2)
save_plot(ms_fig2_file, fig2_grid, base_height = 10, base_width = 7)

Session Info

The following shows the R version and package versions loaded for this analysis to enable reproducibility.

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DT_0.20           cowplot_1.1.1     ggrepel_0.9.1     ggtext_0.1.1     
##  [5] performance_0.8.0 broom_0.7.12      dragon_1.2.0      forcats_0.5.1    
##  [9] stringr_1.4.0     dplyr_1.0.8       purrr_0.3.4       readr_2.1.2      
## [13] tidyr_1.2.0       tibble_3.1.6      ggplot2_3.3.5     tidyverse_1.3.1  
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3     ellipsis_0.3.2       rprojroot_2.0.2     
##   [4] markdown_1.1         fs_1.5.2             gridtext_0.1.4      
##   [7] rstudioapi_0.13      farver_2.1.0         roxygen2_7.1.2      
##  [10] remotes_2.4.2        bit64_4.0.5          golem_0.3.1         
##  [13] fansi_1.0.2          lubridate_1.8.0      xml2_1.3.3          
##  [16] splines_4.1.2        knitr_1.37           config_0.3.1        
##  [19] pkgload_1.2.4        jsonlite_1.8.0       dbplyr_2.1.1        
##  [22] shinydashboard_0.7.2 shiny_1.7.1          compiler_4.1.2      
##  [25] httr_1.4.2           backports_1.4.1      Matrix_1.4-0        
##  [28] assertthat_0.2.1     fastmap_1.1.0        cli_3.2.0           
##  [31] later_1.3.0          htmltools_0.5.2      prettyunits_1.1.1   
##  [34] tools_4.1.2          gtable_0.3.0         glue_1.6.2          
##  [37] Rcpp_1.0.8           cellranger_1.1.0     jquerylib_0.1.4     
##  [40] vctrs_0.3.8          nlme_3.1-155         crosstalk_1.2.0     
##  [43] insight_0.16.0       xfun_0.29            ps_1.6.0            
##  [46] brio_1.1.3           testthat_3.1.2       rvest_1.0.2         
##  [49] mime_0.12            lifecycle_1.0.1      scales_1.1.1        
##  [52] vroom_1.5.7          ragg_1.2.2           hms_1.1.1           
##  [55] promises_1.2.0.1     parallel_4.1.2       yaml_2.3.5          
##  [58] see_0.6.9            sass_0.4.0           stringi_1.7.6       
##  [61] highr_0.9            bayestestR_0.11.5    desc_1.4.0          
##  [64] pkgbuild_1.3.1       attempt_0.3.1        systemfonts_1.0.4   
##  [67] rlang_1.0.1          pkgconfig_2.0.3      lattice_0.20-45     
##  [70] evaluate_0.15        labeling_0.4.2       patchwork_1.1.1     
##  [73] htmlwidgets_1.5.4    bit_4.0.4            processx_3.5.2      
##  [76] tidyselect_1.1.2     magrittr_2.0.2       R6_2.5.1            
##  [79] generics_0.1.2       DBI_1.1.2            pillar_1.7.0        
##  [82] haven_2.4.3          withr_2.4.3          mgcv_1.8-39         
##  [85] datawizard_0.2.3     modelr_0.1.8         crayon_1.5.0        
##  [88] shinyWidgets_0.6.4   utf8_1.2.2           tzdb_0.2.0          
##  [91] rmarkdown_2.11       usethis_2.1.5        grid_4.1.2          
##  [94] readxl_1.3.1         callr_3.7.0          reprex_2.0.1        
##  [97] digest_0.6.29        xtable_1.8-4         httpuv_1.6.5        
## [100] textshaping_0.3.6    munsell_0.5.0        dockerfiler_0.1.4   
## [103] bslib_0.3.1